/* -*- c -*- */

/*
 * The purpose of this module is to add faster sort functions
 * that are type-specific.  This is done by altering the
 * function table for the builtin descriptors.
 *
 * These sorting functions are copied almost directly from numarray
 * with a few modifications (complex comparisons compare the imaginary
 * part if the real parts are equal, for example), and the names
 * are changed.
 *
 * The original sorting code is due to Charles R. Harris who wrote
 * it for numarray.
 */

/*
 * Quick sort is usually the fastest, but the worst case scenario can
 * be slower than the merge and heap sorts.  The merge sort requires
 * extra memory and so for large arrays may not be useful.
 *
 * The merge sort is *stable*, meaning that equal components
 * are unmoved from their entry versions, so it can be used to
 * implement lexigraphic sorting on multiple keys.
 *
 * The heap sort is included for completeness.
 */

#define NPY_NO_DEPRECATED_API NPY_API_VERSION

#include "npy_sort.h"
#include "npysort_common.h"
#include <stdlib.h>

#define NOT_USED NPY_UNUSED(unused)
#define PYA_QS_STACK 100
#define SMALL_QUICKSORT 15
#define SMALL_MERGESORT 20
#define SMALL_STRING 16


/*
 *****************************************************************************
 **                            NUMERIC SORTS                                **
 *****************************************************************************
 */


/**begin repeat
 *
 * #TYPE = BOOL, BYTE, UBYTE, SHORT, USHORT, INT, UINT, LONG, ULONG,
 *         LONGLONG, ULONGLONG, HALF, FLOAT, DOUBLE, LONGDOUBLE,
 *         CFLOAT, CDOUBLE, CLONGDOUBLE, DATETIME, TIMEDELTA#
 * #suff = bool, byte, ubyte, short, ushort, int, uint, long, ulong,
 *         longlong, ulonglong, half, float, double, longdouble,
 *         cfloat, cdouble, clongdouble, datetime, timedelta#
 * #type = npy_bool, npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int,
 *         npy_uint, npy_long, npy_ulong, npy_longlong, npy_ulonglong,
 *         npy_ushort, npy_float, npy_double, npy_longdouble, npy_cfloat,
 *         npy_cdouble, npy_clongdouble, npy_datetime, npy_timedelta#
 */

int
heapsort_@suff@(void *start, npy_intp n, void *NOT_USED)
{
    @type@ tmp, *a;
    npy_intp i,j,l;

    /* The array needs to be offset by one for heapsort indexing */
    a = (@type@ *)start - 1;

    for (l = n>>1; l > 0; --l) {
        tmp = a[l];
        for (i = l, j = l<<1; j <= n;) {
            if (j < n && @TYPE@_LT(a[j], a[j+1])) {
                j += 1;
            }
            if (@TYPE@_LT(tmp, a[j])) {
                a[i] = a[j];
                i = j;
                j += j;
            }
            else {
                break;
            }
        }
        a[i] = tmp;
    }

    for (; n > 1;) {
        tmp = a[n];
        a[n] = a[1];
        n -= 1;
        for (i = 1, j = 2; j <= n;) {
            if (j < n && @TYPE@_LT(a[j], a[j+1])) {
                j++;
            }
            if (@TYPE@_LT(tmp, a[j])) {
                a[i] = a[j];
                i = j;
                j += j;
            }
            else {
                break;
            }
        }
        a[i] = tmp;
    }

    return 0;
}


int
aheapsort_@suff@(void *vv, npy_intp *tosort, npy_intp n, void *NOT_USED)
{
    @type@ *v = vv;
    npy_intp *a, i,j,l, tmp;
    /* The arrays need to be offset by one for heapsort indexing */
    a = tosort - 1;

    for (l = n>>1; l > 0; --l) {
        tmp = a[l];
        for (i = l, j = l<<1; j <= n;) {
            if (j < n && @TYPE@_LT(v[a[j]], v[a[j+1]])) {
                j += 1;
            }
            if (@TYPE@_LT(v[tmp], v[a[j]])) {
                a[i] = a[j];
                i = j;
                j += j;
            }
            else {
                break;
            }
        }
        a[i] = tmp;
    }

    for (; n > 1;) {
        tmp = a[n];
        a[n] = a[1];
        n -= 1;
        for (i = 1, j = 2; j <= n;) {
            if (j < n && @TYPE@_LT(v[a[j]], v[a[j+1]])) {
                j++;
            }
            if (@TYPE@_LT(v[tmp], v[a[j]])) {
                a[i] = a[j];
                i = j;
                j += j;
            }
            else {
                break;
            }
        }
        a[i] = tmp;
    }

    return 0;
}

/**end repeat**/


/*
 *****************************************************************************
 **                             STRING SORTS                                **
 *****************************************************************************
 */


/**begin repeat
 *
 * #TYPE = STRING, UNICODE#
 * #suff = string, unicode#
 * #type = npy_char, npy_ucs4#
 */

int
heapsort_@suff@(void *start, npy_intp n, void *varr)
{
    PyArrayObject *arr = varr;
    size_t len = PyArray_ITEMSIZE(arr)/sizeof(@type@);
    @type@ *tmp = malloc(PyArray_ITEMSIZE(arr));
    @type@ *a = (@type@ *)start - len;
    npy_intp i, j, l;

    if (tmp == NULL) {
        return -NPY_ENOMEM;
    }

    for (l = n>>1; l > 0; --l) {
        @TYPE@_COPY(tmp, a + l*len, len);
        for (i = l, j = l<<1; j <= n;) {
            if (j < n && @TYPE@_LT(a + j*len, a + (j+1)*len, len))
                j += 1;
            if (@TYPE@_LT(tmp, a + j*len, len)) {
                @TYPE@_COPY(a + i*len, a + j*len, len);
                i = j;
                j += j;
            }
            else {
                break;
            }
        }
        @TYPE@_COPY(a + i*len, tmp, len);
    }

    for (; n > 1;) {
        @TYPE@_COPY(tmp, a + n*len, len);
        @TYPE@_COPY(a + n*len, a + len, len);
        n -= 1;
        for (i = 1, j = 2; j <= n;) {
            if (j < n && @TYPE@_LT(a + j*len, a + (j+1)*len, len))
                j++;
            if (@TYPE@_LT(tmp, a + j*len, len)) {
                @TYPE@_COPY(a + i*len, a + j*len, len);
                i = j;
                j += j;
            }
            else {
                break;
            }
        }
        @TYPE@_COPY(a + i*len, tmp, len);
    }

    free(tmp);
    return 0;
}


int
aheapsort_@suff@(void *vv, npy_intp *tosort, npy_intp n, void *varr)
{
    @type@ *v = vv;
    PyArrayObject *arr = varr;
    size_t len = PyArray_ITEMSIZE(arr)/sizeof(@type@);
    npy_intp *a, i,j,l, tmp;

    /* The array needs to be offset by one for heapsort indexing */
    a = tosort - 1;

    for (l = n>>1; l > 0; --l) {
        tmp = a[l];
        for (i = l, j = l<<1; j <= n;) {
            if (j < n && @TYPE@_LT(v + a[j]*len, v + a[j+1]*len, len))
                j += 1;
            if (@TYPE@_LT(v + tmp*len, v + a[j]*len, len)) {
                a[i] = a[j];
                i = j;
                j += j;
            }
            else {
                break;
            }
        }
        a[i] = tmp;
    }

    for (; n > 1;) {
        tmp = a[n];
        a[n] = a[1];
        n -= 1;
        for (i = 1, j = 2; j <= n;) {
            if (j < n && @TYPE@_LT(v + a[j]*len, v + a[j+1]*len, len))
                j++;
            if (@TYPE@_LT(v + tmp*len, v + a[j]*len, len)) {
                a[i] = a[j];
                i = j;
                j += j;
            }
            else {
                break;
            }
        }
        a[i] = tmp;
    }

    return 0;
}

/**end repeat**/


/*
 *****************************************************************************
 **                             GENERIC SORT                                **
 *****************************************************************************
 */


int
npy_heapsort(void *start, npy_intp num, void *varr)
{
    PyArrayObject *arr = varr;
    npy_intp elsize = PyArray_ITEMSIZE(arr);
    PyArray_CompareFunc *cmp = PyArray_DESCR(arr)->f->compare;
    char *tmp = malloc(elsize);
    char *a = (char *)start - elsize;
    npy_intp i, j, l;

    if (tmp == NULL) {
        return -NPY_ENOMEM;
    }

    for (l = num >> 1; l > 0; --l) {
        GENERIC_COPY(tmp, a + l*elsize, elsize);
        for (i = l, j = l << 1; j <= num;) {
            if (j < num && cmp(a + j*elsize, a + (j+1)*elsize, arr) < 0) {
                ++j;
            }
            if (cmp(tmp, a + j*elsize, arr) < 0) {
                GENERIC_COPY(a + i*elsize, a + j*elsize, elsize);
                i = j;
                j += j;
            }
            else {
                break;
            }
        }
        GENERIC_COPY(a + i*elsize, tmp, elsize);
    }

    for (; num > 1;) {
        GENERIC_COPY(tmp, a + num*elsize, elsize);
        GENERIC_COPY(a + num*elsize, a + elsize, elsize);
        num -= 1;
        for (i = 1, j = 2; j <= num;) {
            if (j < num && cmp(a + j*elsize, a + (j+1)*elsize, arr) < 0) {
                ++j;
            }
            if (cmp(tmp, a + j*elsize, arr) < 0) {
                GENERIC_COPY(a + i*elsize, a + j*elsize, elsize);
                i = j;
                j += j;
            }
            else {
                break;
            }
        }
        GENERIC_COPY(a + i*elsize, tmp, elsize);
    }

    free(tmp);
    return 0;
}


int
npy_aheapsort(void *vv, npy_intp *tosort, npy_intp n, void *varr)
{
    char *v = vv;
    PyArrayObject *arr = varr;
    npy_intp elsize = PyArray_ITEMSIZE(arr);
    PyArray_CompareFunc *cmp = PyArray_DESCR(arr)->f->compare;
    npy_intp *a, i, j, l, tmp;

    /* The array needs to be offset by one for heapsort indexing */
    a = tosort - 1;

    for (l = n >> 1; l > 0; --l) {
        tmp = a[l];
        for (i = l, j = l<<1; j <= n;) {
            if (j < n && cmp(v + a[j]*elsize, v + a[j+1]*elsize, arr) < 0) {
                ++j;
            }
            if (cmp(v + tmp*elsize, v + a[j]*elsize, arr) < 0) {
                a[i] = a[j];
                i = j;
                j += j;
            }
            else {
                break;
            }
        }
        a[i] = tmp;
    }

    for (; n > 1;) {
        tmp = a[n];
        a[n] = a[1];
        n -= 1;
        for (i = 1, j = 2; j <= n;) {
            if (j < n && cmp(v + a[j]*elsize, v + a[j+1]*elsize, arr) < 0) {
                ++j;
            }
            if (cmp(v + tmp*elsize, v + a[j]*elsize, arr) < 0) {
                a[i] = a[j];
                i = j;
                j += j;
            }
            else {
                break;
            }
        }
        a[i] = tmp;
    }

    return 0;
}
